# -*- coding: utf-8 -*-
import random
from zc_core.spiders.base import BaseSpider
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from guotie.rules import *


class SkuSpider(BaseSpider):
    name = 'sku'
    # 常用链接
    cat_url = "https://mall.95306.cn/proxy/item/mall/frontcategory/getAllCategoryList?platformId=20&businessType=1"
    sku_list_url = 'https://mall.95306.cn/proxy/item/mall/search/queryItemListByCid?businessType=1&cid={}&platformId=20&pageNum={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 页数限制
        self.max_page_limit = 300

    def start_requests(self):

        cookies = {'st': '0e19cec81469c2f21d52d7d490ea7bba', 'AlteonPmall': '0a03b7f8ce46a8361f41'}
        if not cookies or 'st' not in cookies:
            self.logger.error('init cookie failed...')
            return
        self.authorization = cookies.get('st')
        self.logger.info('init cookie: %s', cookies)

        yield Request(
            url=self.cat_url,
            headers={
                'Host': 'mall.95306.cn',
                'Connection': 'keep-alive',
                'Accept': 'application/json, text/javascript, */*; q=0.01',
                'X-Requested-With': 'XMLHttpRequest',
                'Authorization': self.authorization,
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3775.400 QQBrowser/10.6.4208.400',
                'Referer': 'https://mall.95306.cn/mall-view/',
                'Accept-Encoding': 'gzip, deflate, br',
                'Accept-Language': 'zh-CN,zh;q=0.9',
            },
            meta={
                'reqType': 'catalog',
                'batchNo': self.batch_no,
            },
            cookies=cookies,
            callback=self.parse_catalog,
            errback=self.error_back,
            priority=100
        )

    def parse_catalog(self, response):
        if response and response.text:
            cats = parse_catalog(response)
            if cats and len(cats) > 0:
                yield Box('catalog', self.batch_no, cats)

                # 请求商品
                random.shuffle(cats)
                for cat in cats:
                    if cat.get('level') == 3:
                        # 采集sku第一页
                        page = 1
                        cat_id = cat.get('catalogId')
                        yield Request(
                            url=self.sku_list_url.format(cat_id, page),
                            headers={
                                'Host': 'mall.95306.cn',
                                'Connection': 'keep-alive',
                                'Accept': 'application/json, text/javascript, */*; q=0.01',
                                'X-Requested-With': 'XMLHttpRequest',
                                'Authorization': self.authorization,
                                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3775.400 QQBrowser/10.6.4208.400',
                                'Referer': 'https://mall.95306.cn/mall-view/',
                                'Accept-Encoding': 'gzip, deflate, br',
                                'Accept-Language': 'zh-CN,zh;q=0.9',
                            },
                            meta={
                                'reqType': 'catalog',
                                'batchNo': self.batch_no,
                                'cat_id': cat_id,
                                'page': page,
                            },
                            callback=self.parse_sku_list,
                            errback=self.error_back
                        )
            else:
                self.logger.error('无分类: [%s] url -> [%s]' % (self.batch_no, self.cat_url))

    def parse_sku_list(self, response):
        meta = response.meta
        cat3_id = meta.get("cat_id")
        cur_page = meta.get("page")

        sku_list, total_pages = parse_sku_list(response)
        if sku_list:
            self.logger.info('清单: cat=%s, page=%s, count=%s' % (cat3_id, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)

            if total_pages > self.max_page_limit:
                self.logger.info('页数超限: cat=%s, total=%s' % (cat3_id, total_pages))
            self.logger.info('页数: cat=%s, total=%s' % (cat3_id, total_pages))

            # 后续分页请求
            for page in range(2, total_pages + 1):
                yield Request(
                    url=self.sku_list_url.format(cat3_id, page),
                    headers={
                        'Host': 'mall.95306.cn',
                        'Connection': 'keep-alive',
                        'Accept': 'application/json, text/javascript, */*; q=0.01',
                        'X-Requested-With': 'XMLHttpRequest',
                        'Authorization': self.authorization,
                        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3775.400 QQBrowser/10.6.4208.400',
                        'Referer': 'https://mall.95306.cn/mall-view/',
                        'Accept-Encoding': 'gzip, deflate, br',
                        'Accept-Language': 'zh-CN,zh;q=0.9',
                    },
                    meta={
                        'reqType': 'catalog',
                        'batchNo': self.batch_no,
                        'cat_id': cat3_id,
                        'page': page,
                    },
                    callback=self.parse_more_sku,
                    errback=self.error_back
                )
        else:
            self.logger.info('空页: cat=%s, page=%s' % (cat3_id, cur_page))

    def parse_more_sku(self, response):
        meta = response.meta
        cat3_id = meta.get("cat_id")
        cur_page = meta.get("page")

        sku_list, total_pages = parse_sku_list(response)
        if sku_list:
            self.logger.info('清单: cat=%s, page=%s, count=%s' % (cat3_id, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
        else:
            self.logger.info('空页: cat=%s, page=%s' % (cat3_id, cur_page))


